﻿//	Copyright (c) 2012, Michael Kunz. All rights reserved.
//	http://managedcuda.codeplex.com
//
//	This file is part of ManagedCuda.
//
//	ManagedCuda is free software: you can redistribute it and/or modify
//	it under the terms of the GNU Lesser General Public License as 
//	published by the Free Software Foundation, either version 2.1 of the 
//	License, or (at your option) any later version.
//
//	ManagedCuda is distributed in the hope that it will be useful,
//	but WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//	GNU Lesser General Public License for more details.
//
//	You should have received a copy of the GNU Lesser General Public
//	License along with this library; if not, write to the Free Software
//	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
//	MA 02110-1301  USA, http://www.gnu.org/licenses/.

using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using ManagedCuda.BasicTypes;
using ManagedCuda.VectorTypes;
using System.Runtime.InteropServices;
using System.Diagnostics;

namespace ManagedCuda
{
	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: byte
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_byte: IDisposable, IEnumerable<byte>
	{
		IntPtr _intPtr;
		byte* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_byte(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(byte));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (byte*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_byte(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(byte));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (byte*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_byte(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (byte*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(byte));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_byte()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public byte this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<byte> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<byte> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<byte> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<byte> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<byte> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<byte> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<byte> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<byte> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<byte> IEnumerable<byte>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<byte> enumerator = new CudaPageLockedHostMemoryEnumerator_byte(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_byte(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_byte : IEnumerator<byte>
	{
		private CudaPageLockedHostMemory_byte _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_byte(CudaPageLockedHostMemory_byte memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public byte Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uchar1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uchar1: IDisposable, IEnumerable<uchar1>
	{
		IntPtr _intPtr;
		uchar1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uchar1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uchar1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uchar1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uchar1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uchar1> IEnumerable<uchar1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uchar1> enumerator = new CudaPageLockedHostMemoryEnumerator_uchar1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uchar1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uchar1 : IEnumerator<uchar1>
	{
		private CudaPageLockedHostMemory_uchar1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uchar1(CudaPageLockedHostMemory_uchar1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uchar1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uchar2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uchar2: IDisposable, IEnumerable<uchar2>
	{
		IntPtr _intPtr;
		uchar2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uchar2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uchar2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uchar2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uchar2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uchar2> IEnumerable<uchar2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uchar2> enumerator = new CudaPageLockedHostMemoryEnumerator_uchar2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uchar2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uchar2 : IEnumerator<uchar2>
	{
		private CudaPageLockedHostMemory_uchar2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uchar2(CudaPageLockedHostMemory_uchar2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uchar2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uchar3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uchar3: IDisposable, IEnumerable<uchar3>
	{
		IntPtr _intPtr;
		uchar3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uchar3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uchar3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uchar3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uchar3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uchar3> IEnumerable<uchar3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uchar3> enumerator = new CudaPageLockedHostMemoryEnumerator_uchar3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uchar3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uchar3 : IEnumerator<uchar3>
	{
		private CudaPageLockedHostMemory_uchar3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uchar3(CudaPageLockedHostMemory_uchar3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uchar3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uchar4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uchar4: IDisposable, IEnumerable<uchar4>
	{
		IntPtr _intPtr;
		uchar4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uchar4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uchar4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uchar4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uchar4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uchar4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uchar4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uchar4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uchar4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uchar4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uchar4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uchar4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uchar4> IEnumerable<uchar4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uchar4> enumerator = new CudaPageLockedHostMemoryEnumerator_uchar4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uchar4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uchar4 : IEnumerator<uchar4>
	{
		private CudaPageLockedHostMemory_uchar4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uchar4(CudaPageLockedHostMemory_uchar4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uchar4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: sbyte
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_sbyte: IDisposable, IEnumerable<sbyte>
	{
		IntPtr _intPtr;
		sbyte* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_sbyte(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(sbyte));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (sbyte*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_sbyte(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(sbyte));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (sbyte*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_sbyte(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (sbyte*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(sbyte));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_sbyte()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public sbyte this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<sbyte> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<sbyte> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<sbyte> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<sbyte> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<sbyte> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<sbyte> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<sbyte> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<sbyte> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<sbyte> IEnumerable<sbyte>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<sbyte> enumerator = new CudaPageLockedHostMemoryEnumerator_sbyte(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_sbyte(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_sbyte : IEnumerator<sbyte>
	{
		private CudaPageLockedHostMemory_sbyte _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_sbyte(CudaPageLockedHostMemory_sbyte memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public sbyte Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: char1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_char1: IDisposable, IEnumerable<char1>
	{
		IntPtr _intPtr;
		char1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_char1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (char1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_char1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public char1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<char1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<char1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<char1> IEnumerable<char1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<char1> enumerator = new CudaPageLockedHostMemoryEnumerator_char1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_char1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_char1 : IEnumerator<char1>
	{
		private CudaPageLockedHostMemory_char1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_char1(CudaPageLockedHostMemory_char1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public char1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: char2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_char2: IDisposable, IEnumerable<char2>
	{
		IntPtr _intPtr;
		char2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_char2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (char2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_char2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public char2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<char2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<char2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<char2> IEnumerable<char2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<char2> enumerator = new CudaPageLockedHostMemoryEnumerator_char2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_char2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_char2 : IEnumerator<char2>
	{
		private CudaPageLockedHostMemory_char2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_char2(CudaPageLockedHostMemory_char2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public char2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: char3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_char3: IDisposable, IEnumerable<char3>
	{
		IntPtr _intPtr;
		char3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_char3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (char3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_char3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public char3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<char3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<char3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<char3> IEnumerable<char3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<char3> enumerator = new CudaPageLockedHostMemoryEnumerator_char3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_char3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_char3 : IEnumerator<char3>
	{
		private CudaPageLockedHostMemory_char3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_char3(CudaPageLockedHostMemory_char3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public char3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: char4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_char4: IDisposable, IEnumerable<char4>
	{
		IntPtr _intPtr;
		char4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_char4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (char4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_char4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (char4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(char4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_char4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public char4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<char4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<char4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<char4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<char4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<char4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<char4> IEnumerable<char4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<char4> enumerator = new CudaPageLockedHostMemoryEnumerator_char4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_char4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_char4 : IEnumerator<char4>
	{
		private CudaPageLockedHostMemory_char4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_char4(CudaPageLockedHostMemory_char4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public char4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: short
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_short: IDisposable, IEnumerable<short>
	{
		IntPtr _intPtr;
		short* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_short(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (short*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_short()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public short this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<short> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<short> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<short> IEnumerable<short>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<short> enumerator = new CudaPageLockedHostMemoryEnumerator_short(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_short(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_short : IEnumerator<short>
	{
		private CudaPageLockedHostMemory_short _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_short(CudaPageLockedHostMemory_short memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public short Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: short1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_short1: IDisposable, IEnumerable<short1>
	{
		IntPtr _intPtr;
		short1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_short1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (short1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_short1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public short1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<short1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<short1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<short1> IEnumerable<short1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<short1> enumerator = new CudaPageLockedHostMemoryEnumerator_short1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_short1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_short1 : IEnumerator<short1>
	{
		private CudaPageLockedHostMemory_short1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_short1(CudaPageLockedHostMemory_short1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public short1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: short2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_short2: IDisposable, IEnumerable<short2>
	{
		IntPtr _intPtr;
		short2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_short2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (short2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_short2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public short2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<short2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<short2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<short2> IEnumerable<short2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<short2> enumerator = new CudaPageLockedHostMemoryEnumerator_short2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_short2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_short2 : IEnumerator<short2>
	{
		private CudaPageLockedHostMemory_short2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_short2(CudaPageLockedHostMemory_short2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public short2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: short3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_short3: IDisposable, IEnumerable<short3>
	{
		IntPtr _intPtr;
		short3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_short3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (short3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_short3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public short3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<short3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<short3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<short3> IEnumerable<short3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<short3> enumerator = new CudaPageLockedHostMemoryEnumerator_short3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_short3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_short3 : IEnumerator<short3>
	{
		private CudaPageLockedHostMemory_short3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_short3(CudaPageLockedHostMemory_short3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public short3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: short4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_short4: IDisposable, IEnumerable<short4>
	{
		IntPtr _intPtr;
		short4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_short4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (short4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_short4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (short4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(short4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_short4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public short4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<short4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<short4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<short4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<short4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<short4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<short4> IEnumerable<short4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<short4> enumerator = new CudaPageLockedHostMemoryEnumerator_short4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_short4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_short4 : IEnumerator<short4>
	{
		private CudaPageLockedHostMemory_short4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_short4(CudaPageLockedHostMemory_short4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public short4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ushort
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ushort: IDisposable, IEnumerable<ushort>
	{
		IntPtr _intPtr;
		ushort* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ushort(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ushort*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ushort()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ushort this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ushort> IEnumerable<ushort>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ushort> enumerator = new CudaPageLockedHostMemoryEnumerator_ushort(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ushort(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ushort : IEnumerator<ushort>
	{
		private CudaPageLockedHostMemory_ushort _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ushort(CudaPageLockedHostMemory_ushort memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ushort Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ushort1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ushort1: IDisposable, IEnumerable<ushort1>
	{
		IntPtr _intPtr;
		ushort1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ushort1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ushort1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ushort1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ushort1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ushort1> IEnumerable<ushort1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ushort1> enumerator = new CudaPageLockedHostMemoryEnumerator_ushort1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ushort1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ushort1 : IEnumerator<ushort1>
	{
		private CudaPageLockedHostMemory_ushort1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ushort1(CudaPageLockedHostMemory_ushort1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ushort1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ushort2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ushort2: IDisposable, IEnumerable<ushort2>
	{
		IntPtr _intPtr;
		ushort2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ushort2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ushort2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ushort2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ushort2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ushort2> IEnumerable<ushort2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ushort2> enumerator = new CudaPageLockedHostMemoryEnumerator_ushort2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ushort2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ushort2 : IEnumerator<ushort2>
	{
		private CudaPageLockedHostMemory_ushort2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ushort2(CudaPageLockedHostMemory_ushort2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ushort2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ushort3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ushort3: IDisposable, IEnumerable<ushort3>
	{
		IntPtr _intPtr;
		ushort3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ushort3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ushort3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ushort3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ushort3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ushort3> IEnumerable<ushort3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ushort3> enumerator = new CudaPageLockedHostMemoryEnumerator_ushort3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ushort3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ushort3 : IEnumerator<ushort3>
	{
		private CudaPageLockedHostMemory_ushort3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ushort3(CudaPageLockedHostMemory_ushort3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ushort3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ushort4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ushort4: IDisposable, IEnumerable<ushort4>
	{
		IntPtr _intPtr;
		ushort4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ushort4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ushort4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ushort4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ushort4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ushort4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ushort4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ushort4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ushort4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ushort4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ushort4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ushort4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ushort4> IEnumerable<ushort4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ushort4> enumerator = new CudaPageLockedHostMemoryEnumerator_ushort4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ushort4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ushort4 : IEnumerator<ushort4>
	{
		private CudaPageLockedHostMemory_ushort4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ushort4(CudaPageLockedHostMemory_ushort4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ushort4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: int
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_int: IDisposable, IEnumerable<int>
	{
		IntPtr _intPtr;
		int* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_int(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (int*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_int()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public int this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<int> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<int> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<int> IEnumerable<int>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<int> enumerator = new CudaPageLockedHostMemoryEnumerator_int(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_int(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_int : IEnumerator<int>
	{
		private CudaPageLockedHostMemory_int _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_int(CudaPageLockedHostMemory_int memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public int Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: int1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_int1: IDisposable, IEnumerable<int1>
	{
		IntPtr _intPtr;
		int1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_int1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (int1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_int1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public int1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<int1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<int1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<int1> IEnumerable<int1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<int1> enumerator = new CudaPageLockedHostMemoryEnumerator_int1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_int1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_int1 : IEnumerator<int1>
	{
		private CudaPageLockedHostMemory_int1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_int1(CudaPageLockedHostMemory_int1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public int1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: int2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_int2: IDisposable, IEnumerable<int2>
	{
		IntPtr _intPtr;
		int2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_int2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (int2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_int2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public int2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<int2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<int2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<int2> IEnumerable<int2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<int2> enumerator = new CudaPageLockedHostMemoryEnumerator_int2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_int2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_int2 : IEnumerator<int2>
	{
		private CudaPageLockedHostMemory_int2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_int2(CudaPageLockedHostMemory_int2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public int2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: int3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_int3: IDisposable, IEnumerable<int3>
	{
		IntPtr _intPtr;
		int3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_int3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (int3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_int3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public int3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<int3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<int3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<int3> IEnumerable<int3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<int3> enumerator = new CudaPageLockedHostMemoryEnumerator_int3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_int3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_int3 : IEnumerator<int3>
	{
		private CudaPageLockedHostMemory_int3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_int3(CudaPageLockedHostMemory_int3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public int3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: int4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_int4: IDisposable, IEnumerable<int4>
	{
		IntPtr _intPtr;
		int4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_int4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (int4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_int4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (int4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(int4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_int4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public int4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<int4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<int4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<int4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<int4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<int4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<int4> IEnumerable<int4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<int4> enumerator = new CudaPageLockedHostMemoryEnumerator_int4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_int4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_int4 : IEnumerator<int4>
	{
		private CudaPageLockedHostMemory_int4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_int4(CudaPageLockedHostMemory_int4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public int4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uint
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uint: IDisposable, IEnumerable<uint>
	{
		IntPtr _intPtr;
		uint* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uint(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uint*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uint()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uint this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uint> IEnumerable<uint>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uint> enumerator = new CudaPageLockedHostMemoryEnumerator_uint(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uint(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uint : IEnumerator<uint>
	{
		private CudaPageLockedHostMemory_uint _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uint(CudaPageLockedHostMemory_uint memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uint Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uint1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uint1: IDisposable, IEnumerable<uint1>
	{
		IntPtr _intPtr;
		uint1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uint1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uint1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uint1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uint1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uint1> IEnumerable<uint1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uint1> enumerator = new CudaPageLockedHostMemoryEnumerator_uint1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uint1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uint1 : IEnumerator<uint1>
	{
		private CudaPageLockedHostMemory_uint1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uint1(CudaPageLockedHostMemory_uint1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uint1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uint2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uint2: IDisposable, IEnumerable<uint2>
	{
		IntPtr _intPtr;
		uint2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uint2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uint2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uint2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uint2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uint2> IEnumerable<uint2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uint2> enumerator = new CudaPageLockedHostMemoryEnumerator_uint2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uint2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uint2 : IEnumerator<uint2>
	{
		private CudaPageLockedHostMemory_uint2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uint2(CudaPageLockedHostMemory_uint2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uint2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uint3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uint3: IDisposable, IEnumerable<uint3>
	{
		IntPtr _intPtr;
		uint3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uint3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uint3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uint3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uint3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uint3> IEnumerable<uint3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uint3> enumerator = new CudaPageLockedHostMemoryEnumerator_uint3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uint3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uint3 : IEnumerator<uint3>
	{
		private CudaPageLockedHostMemory_uint3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uint3(CudaPageLockedHostMemory_uint3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uint3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: uint4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_uint4: IDisposable, IEnumerable<uint4>
	{
		IntPtr _intPtr;
		uint4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_uint4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (uint4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_uint4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (uint4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(uint4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_uint4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public uint4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<uint4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<uint4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<uint4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<uint4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<uint4> IEnumerable<uint4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<uint4> enumerator = new CudaPageLockedHostMemoryEnumerator_uint4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_uint4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_uint4 : IEnumerator<uint4>
	{
		private CudaPageLockedHostMemory_uint4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_uint4(CudaPageLockedHostMemory_uint4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public uint4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: long
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_long: IDisposable, IEnumerable<long>
	{
		IntPtr _intPtr;
		long* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_long(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (long*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_long()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public long this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<long> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<long> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<long> IEnumerable<long>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<long> enumerator = new CudaPageLockedHostMemoryEnumerator_long(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_long(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_long : IEnumerator<long>
	{
		private CudaPageLockedHostMemory_long _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_long(CudaPageLockedHostMemory_long memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public long Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: long1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_long1: IDisposable, IEnumerable<long1>
	{
		IntPtr _intPtr;
		long1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_long1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (long1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_long1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public long1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<long1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<long1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<long1> IEnumerable<long1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<long1> enumerator = new CudaPageLockedHostMemoryEnumerator_long1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_long1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_long1 : IEnumerator<long1>
	{
		private CudaPageLockedHostMemory_long1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_long1(CudaPageLockedHostMemory_long1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public long1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: long2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_long2: IDisposable, IEnumerable<long2>
	{
		IntPtr _intPtr;
		long2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_long2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (long2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_long2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (long2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(long2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_long2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public long2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<long2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<long2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<long2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<long2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<long2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<long2> IEnumerable<long2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<long2> enumerator = new CudaPageLockedHostMemoryEnumerator_long2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_long2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_long2 : IEnumerator<long2>
	{
		private CudaPageLockedHostMemory_long2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_long2(CudaPageLockedHostMemory_long2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public long2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ulong
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ulong: IDisposable, IEnumerable<ulong>
	{
		IntPtr _intPtr;
		ulong* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ulong(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ulong*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ulong()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ulong this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ulong> IEnumerable<ulong>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ulong> enumerator = new CudaPageLockedHostMemoryEnumerator_ulong(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ulong(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ulong : IEnumerator<ulong>
	{
		private CudaPageLockedHostMemory_ulong _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ulong(CudaPageLockedHostMemory_ulong memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ulong Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ulong1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ulong1: IDisposable, IEnumerable<ulong1>
	{
		IntPtr _intPtr;
		ulong1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ulong1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ulong1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ulong1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ulong1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ulong1> IEnumerable<ulong1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ulong1> enumerator = new CudaPageLockedHostMemoryEnumerator_ulong1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ulong1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ulong1 : IEnumerator<ulong1>
	{
		private CudaPageLockedHostMemory_ulong1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ulong1(CudaPageLockedHostMemory_ulong1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ulong1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: ulong2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_ulong2: IDisposable, IEnumerable<ulong2>
	{
		IntPtr _intPtr;
		ulong2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_ulong2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (ulong2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_ulong2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (ulong2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(ulong2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_ulong2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public ulong2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<ulong2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<ulong2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<ulong2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<ulong2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<ulong2> IEnumerable<ulong2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<ulong2> enumerator = new CudaPageLockedHostMemoryEnumerator_ulong2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_ulong2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_ulong2 : IEnumerator<ulong2>
	{
		private CudaPageLockedHostMemory_ulong2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_ulong2(CudaPageLockedHostMemory_ulong2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public ulong2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: float
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_float: IDisposable, IEnumerable<float>
	{
		IntPtr _intPtr;
		float* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_float(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (float*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_float()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public float this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<float> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<float> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<float> IEnumerable<float>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<float> enumerator = new CudaPageLockedHostMemoryEnumerator_float(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_float(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_float : IEnumerator<float>
	{
		private CudaPageLockedHostMemory_float _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_float(CudaPageLockedHostMemory_float memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public float Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: float1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_float1: IDisposable, IEnumerable<float1>
	{
		IntPtr _intPtr;
		float1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_float1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (float1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_float1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public float1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<float1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<float1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<float1> IEnumerable<float1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<float1> enumerator = new CudaPageLockedHostMemoryEnumerator_float1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_float1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_float1 : IEnumerator<float1>
	{
		private CudaPageLockedHostMemory_float1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_float1(CudaPageLockedHostMemory_float1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public float1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: float2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_float2: IDisposable, IEnumerable<float2>
	{
		IntPtr _intPtr;
		float2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_float2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (float2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_float2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public float2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<float2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<float2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<float2> IEnumerable<float2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<float2> enumerator = new CudaPageLockedHostMemoryEnumerator_float2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_float2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_float2 : IEnumerator<float2>
	{
		private CudaPageLockedHostMemory_float2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_float2(CudaPageLockedHostMemory_float2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public float2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: float3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_float3: IDisposable, IEnumerable<float3>
	{
		IntPtr _intPtr;
		float3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_float3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (float3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_float3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public float3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<float3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<float3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<float3> IEnumerable<float3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<float3> enumerator = new CudaPageLockedHostMemoryEnumerator_float3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_float3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_float3 : IEnumerator<float3>
	{
		private CudaPageLockedHostMemory_float3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_float3(CudaPageLockedHostMemory_float3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public float3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: float4
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_float4: IDisposable, IEnumerable<float4>
	{
		IntPtr _intPtr;
		float4* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_float4(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float4(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float4));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (float4*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_float4(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (float4*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(float4));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_float4()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public float4 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float4> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<float4> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<float4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<float4> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float4> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float4> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<float4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<float4> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<float4> IEnumerable<float4>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<float4> enumerator = new CudaPageLockedHostMemoryEnumerator_float4(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_float4(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_float4 : IEnumerator<float4>
	{
		private CudaPageLockedHostMemory_float4 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_float4(CudaPageLockedHostMemory_float4 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public float4 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: double
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_double: IDisposable, IEnumerable<double>
	{
		IntPtr _intPtr;
		double* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_double(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (double*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_double()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public double this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<double> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<double> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<double> IEnumerable<double>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<double> enumerator = new CudaPageLockedHostMemoryEnumerator_double(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_double(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_double : IEnumerator<double>
	{
		private CudaPageLockedHostMemory_double _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_double(CudaPageLockedHostMemory_double memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public double Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: double1
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_double1: IDisposable, IEnumerable<double1>
	{
		IntPtr _intPtr;
		double1* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_double1(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double1(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double1));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double1*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double1(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (double1*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double1));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_double1()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public double1 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double1> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<double1> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<double1> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double1> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double1> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double1> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<double1> IEnumerable<double1>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<double1> enumerator = new CudaPageLockedHostMemoryEnumerator_double1(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_double1(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_double1 : IEnumerator<double1>
	{
		private CudaPageLockedHostMemory_double1 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_double1(CudaPageLockedHostMemory_double1 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public double1 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: double2
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_double2: IDisposable, IEnumerable<double2>
	{
		IntPtr _intPtr;
		double2* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_double2(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double2(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double2));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (double2*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_double2(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (double2*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(double2));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_double2()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public double2 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double2> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<double2> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<double2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<double2> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double2> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double2> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<double2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<double2> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<double2> IEnumerable<double2>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<double2> enumerator = new CudaPageLockedHostMemoryEnumerator_double2(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_double2(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_double2 : IEnumerator<double2>
	{
		private CudaPageLockedHostMemory_double2 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_double2(CudaPageLockedHostMemory_double2 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public double2 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: cuDoubleComplex
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_cuDoubleComplex: IDisposable, IEnumerable<cuDoubleComplex>
	{
		IntPtr _intPtr;
		cuDoubleComplex* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_cuDoubleComplex(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleComplex));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuDoubleComplex*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuDoubleComplex(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleComplex));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuDoubleComplex*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuDoubleComplex(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (cuDoubleComplex*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleComplex));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_cuDoubleComplex()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public cuDoubleComplex this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuDoubleComplex> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuDoubleComplex> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuDoubleComplex> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuDoubleComplex> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuDoubleComplex> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuDoubleComplex> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuDoubleComplex> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuDoubleComplex> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<cuDoubleComplex> IEnumerable<cuDoubleComplex>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<cuDoubleComplex> enumerator = new CudaPageLockedHostMemoryEnumerator_cuDoubleComplex(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_cuDoubleComplex(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_cuDoubleComplex : IEnumerator<cuDoubleComplex>
	{
		private CudaPageLockedHostMemory_cuDoubleComplex _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_cuDoubleComplex(CudaPageLockedHostMemory_cuDoubleComplex memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public cuDoubleComplex Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: cuDoubleReal
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_cuDoubleReal: IDisposable, IEnumerable<cuDoubleReal>
	{
		IntPtr _intPtr;
		cuDoubleReal* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_cuDoubleReal(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleReal));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuDoubleReal*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuDoubleReal(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleReal));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuDoubleReal*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuDoubleReal(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (cuDoubleReal*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuDoubleReal));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_cuDoubleReal()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public cuDoubleReal this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuDoubleReal> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuDoubleReal> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuDoubleReal> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuDoubleReal> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuDoubleReal> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuDoubleReal> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuDoubleReal> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuDoubleReal> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<cuDoubleReal> IEnumerable<cuDoubleReal>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<cuDoubleReal> enumerator = new CudaPageLockedHostMemoryEnumerator_cuDoubleReal(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_cuDoubleReal(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_cuDoubleReal : IEnumerator<cuDoubleReal>
	{
		private CudaPageLockedHostMemory_cuDoubleReal _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_cuDoubleReal(CudaPageLockedHostMemory_cuDoubleReal memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public cuDoubleReal Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: cuFloatComplex
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_cuFloatComplex: IDisposable, IEnumerable<cuFloatComplex>
	{
		IntPtr _intPtr;
		cuFloatComplex* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_cuFloatComplex(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatComplex));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuFloatComplex*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuFloatComplex(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatComplex));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuFloatComplex*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuFloatComplex(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (cuFloatComplex*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatComplex));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_cuFloatComplex()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public cuFloatComplex this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuFloatComplex> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuFloatComplex> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuFloatComplex> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuFloatComplex> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuFloatComplex> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuFloatComplex> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuFloatComplex> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuFloatComplex> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<cuFloatComplex> IEnumerable<cuFloatComplex>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<cuFloatComplex> enumerator = new CudaPageLockedHostMemoryEnumerator_cuFloatComplex(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_cuFloatComplex(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_cuFloatComplex : IEnumerator<cuFloatComplex>
	{
		private CudaPageLockedHostMemory_cuFloatComplex _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_cuFloatComplex(CudaPageLockedHostMemory_cuFloatComplex memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public cuFloatComplex Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: cuFloatReal
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_cuFloatReal: IDisposable, IEnumerable<cuFloatReal>
	{
		IntPtr _intPtr;
		cuFloatReal* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_cuFloatReal(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatReal));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuFloatReal*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuFloatReal(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatReal));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (cuFloatReal*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_cuFloatReal(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (cuFloatReal*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(cuFloatReal));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_cuFloatReal()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public cuFloatReal this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuFloatReal> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuFloatReal> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<cuFloatReal> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<cuFloatReal> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuFloatReal> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuFloatReal> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<cuFloatReal> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<cuFloatReal> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<cuFloatReal> IEnumerable<cuFloatReal>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<cuFloatReal> enumerator = new CudaPageLockedHostMemoryEnumerator_cuFloatReal(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_cuFloatReal(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_cuFloatReal : IEnumerator<cuFloatReal>
	{
		private CudaPageLockedHostMemory_cuFloatReal _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_cuFloatReal(CudaPageLockedHostMemory_cuFloatReal memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public cuFloatReal Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
	/// <summary>
	/// A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy.<para/>
	/// Type: dim3
	/// </summary>
	public unsafe class CudaPageLockedHostMemory_dim3: IDisposable, IEnumerable<dim3>
	{
		IntPtr _intPtr;
		dim3* _ptr;
		SizeT _size = 0;
		SizeT _typeSize = 0;
		CUResult res;
		bool disposed;
		bool _isOwner;

		#region Constructor
		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc
		/// </summary>
		/// <param name="size">In elements</param>
		/// <param name="allocFlags"></param>
		public CudaPageLockedHostMemory_dim3(SizeT size, CUMemHostAllocFlags allocFlags)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(dim3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemHostAlloc(ref _intPtr, _typeSize * size, allocFlags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (dim3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost
		/// </summary>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_dim3(SizeT size)
		{
			_intPtr = new IntPtr();
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(dim3));

			res = DriverAPINativeMethods.MemoryManagement.cuMemAllocHost_v2(ref _intPtr, _typeSize * size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostAlloc", res));
			if (res != CUResult.Success) throw new CudaException(res);
			_ptr = (dim3*) _intPtr;
			_isOwner = true;
		}

		/// <summary>
		/// Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory!
		/// hostPointer won't be freed while disposing.
		/// </summary>
		/// <param name="hostPointer"></param>
		/// <param name="size">In elements</param>
		public CudaPageLockedHostMemory_dim3(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_ptr = (dim3*) _intPtr;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(dim3));
			_isOwner = false;
		}

		/// <summary>
		/// For dispose
		/// </summary>
		~CudaPageLockedHostMemory_dim3()
		{
			Dispose(false);
		}
		#endregion

		#region Dispose
		/// <summary>
		/// Dispose
		/// </summary>
		public void Dispose()
		{
			Dispose(true);
			GC.SuppressFinalize(this);
		}

		/// <summary>
		/// For IDisposable
		/// </summary>
		/// <param name="fDisposing"></param>
		protected virtual void Dispose(bool fDisposing)
		{
			if (fDisposing && !disposed)
			{
				if (_isOwner)
				{
					res = DriverAPINativeMethods.MemoryManagement.cuMemFreeHost(_intPtr);
					Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemFreeHost", res));
				}
				disposed = true;
			}
			if (!fDisposing && !disposed)
				Debug.WriteLine(String.Format("ManagedCUDA not-disposed warning: {0}", this.GetType()));
		}
		#endregion

		#region Properties
		/// <summary>
		/// Pointer to pinned host memory.
		/// </summary>
		public IntPtr PinnedHostPointer
		{
			get { return _intPtr; }
		}

		/// <summary>
		/// Size in bytes
		/// </summary>
		public SizeT SizeInBytes
		{
			get { return _size * _typeSize; }
		}

		/// <summary>
		/// Size in elements
		/// </summary>
		public SizeT Size
		{
			get { return _size; }
		}

		/// <summary>
		/// Access array per element.
		/// </summary>
		/// <param name="index">index in elements</param>
		/// <returns></returns>
		public dim3 this[SizeT index]
		{
			get
			{
				return _ptr[index];
			}
			set
			{
				_ptr[index] = value;
			}
		}

		/// <summary>
		/// If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing.
		/// </summary>
		public bool IsOwner
		{
			get { return _isOwner; }
		}
		#endregion

		#region Synchron Copy Methods
		#region Array
		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoA_v2(deviceArray, offset, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoA", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyToArray1D(CUarray deviceArray)
		{
			SynchronCopyToArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyToArray1D(CudaArray1D array)
		{
			SynchronCopyToArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyToArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyToArray1D(array.CUArray, offset);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyAtoH_v2(this._intPtr, deviceArray, offset, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		public void SynchronCopyFromArray1D(CUarray deviceArray)
		{
			SynchronCopyFromArray1D(deviceArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array)
		{
			SynchronCopyFromArray1D(array.CUArray, 0);
		}

		/// <summary>
		/// Synchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="offset"></param>
		public void SynchronCopyFromArray1D(CudaArray1D array, SizeT offset)
		{
			SynchronCopyFromArray1D(array.CUArray, offset);
		}
		#endregion
		#region devicePtr
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr, this._intPtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToDevice(CudaDeviceVariable<dim3> devicePtr)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(this._intPtr, devicePtr, SizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		public void SynchronCopyToHost(CudaDeviceVariable<dim3> devicePtr)
		{
			SynchronCopyToHost(devicePtr.DevicePointer);
		}
		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyHtoD_v2(devicePtr + offsetDest, new IntPtr(this._intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoD", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToDevice(CudaDeviceVariable<dim3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToDevice(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(new IntPtr(this._intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoH", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Synchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		public void SynchronCopyToHost(CudaDeviceVariable<dim3> devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes)
		{
			SynchronCopyToHost(devicePtr.DevicePointer, offsetSrc, offsetDest, aSizeInBytes);
		}
		#endregion
		#endregion

		#region Asynchron Copy Methods
		#region Array
		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoAAsync_v2(deviceArray, offset, this._intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoAAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyToArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyToArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy host to 1D Array
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">in bytes</param>
		public void AsyncCopyToArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyToArray1D(array.CUArray, stream, offset);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream, SizeT offset)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAtoHAsync_v2(this._intPtr, deviceArray, offset, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyAtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="deviceArray"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CUarray deviceArray, CUstream stream)
		{
			AsyncCopyFromArray1D(deviceArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, 0);
		}

		/// <summary>
		/// Asynchron copy 1D Array to host
		/// </summary>
		/// <param name="array"></param>
		/// <param name="stream"></param>
		/// <param name="offset">bytes</param>
		public void AsyncCopyFromArray1D(CudaArray1D array, CUstream stream, SizeT offset)
		{
			AsyncCopyFromArray1D(array.CUArray, stream, offset);
		}
		#endregion
		#region DevicePtr
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr, _intPtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<dim3> deviceVar, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(_intPtr, devicePtr, SizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<dim3> deviceVar, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, stream);
		}
		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyHtoDAsync_v2(devicePtr + offsetDest, new IntPtr(_intPtr.ToInt64() + (long)offsetSrc), aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyHtoDAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron Copy host to device
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyToDevice(CudaDeviceVariable<dim3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyToDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="devicePtr">Pointer to device memory</param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CUdeviceptr devicePtr, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyDtoHAsync_v2(new IntPtr(_intPtr.ToInt64() + (long)offsetDest), devicePtr + offsetSrc, aSizeInBytes, stream);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemcpyDtoHAsync", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}

		/// <summary>
		/// Asynchron copy device to host
		/// </summary>
		/// <param name="deviceVar"></param>
		/// <param name="offsetSrc">Offset to source pointer in bytes</param>
		/// <param name="offsetDest">Offset to destination pointer in bytes</param>
		/// <param name="aSizeInBytes">Bytes to copy</param>
		/// <param name="stream"></param>
		public void AsyncCopyFromDevice(CudaDeviceVariable<dim3> deviceVar, SizeT offsetSrc, SizeT offsetDest, SizeT aSizeInBytes, CUstream stream)
		{
			AsyncCopyFromDevice(deviceVar.DevicePointer, offsetSrc, offsetDest, aSizeInBytes, stream);
		}
		#endregion



		#endregion

		#region Methods
		/// <summary>
		/// Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag <see cref="CUCtxFlags.MapHost"/>
		/// </summary>
		/// <returns>Device Pointer</returns>
		public CUdeviceptr GetDevicePointer()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUdeviceptr ptr = new CUdeviceptr();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetDevicePointer_v2(ref ptr, _intPtr, 0);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return ptr;
		}

		/// <summary>
		/// Passes back the flags that were specified when allocating the pinned host buffer
		/// </summary>
		/// <returns></returns>
		public CUMemHostAllocFlags GetAllocFlags()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			CUMemHostAllocFlags flags = new CUMemHostAllocFlags();
			res = DriverAPINativeMethods.MemoryManagement.cuMemHostGetFlags(ref flags, _intPtr);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuMemHostGetDevicePointer", res));
			if (res != CUResult.Success) throw new CudaException(res);
			return flags;
		}
		#endregion

		#region IEnumerable
		IEnumerator<dim3> IEnumerable<dim3>.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator<dim3> enumerator = new CudaPageLockedHostMemoryEnumerator_dim3(this);
			return enumerator;
		}

		IEnumerator IEnumerable.GetEnumerator()
		{
			if (disposed) throw new ObjectDisposedException(this.ToString());
			IEnumerator enumerator = new CudaPageLockedHostMemoryEnumerator_dim3(this);
			return enumerator;
		}

		#endregion
	}
	
	/// <summary>
	/// Enumerator class for CudaPageLockedHostMemory
	/// </summary>
	public class CudaPageLockedHostMemoryEnumerator_dim3 : IEnumerator<dim3>
	{
		private CudaPageLockedHostMemory_dim3 _memory = null;
		private SizeT _currentIndex = -1;

		/// <summary>
		/// 
		/// </summary>
		/// <param name="memory"></param>
		public CudaPageLockedHostMemoryEnumerator_dim3(CudaPageLockedHostMemory_dim3 memory)
		{
			_memory = memory;
		}

		void IDisposable.Dispose() { }

		/// <summary>
		/// 
		/// </summary>
		public void Reset()
		{
			_currentIndex = -1;
		}

		/// <summary>
		/// 
		/// </summary>
		public dim3 Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		object IEnumerator.Current
		{
			get { return _memory[_currentIndex]; }
		}

		/// <summary>
		/// 
		/// </summary>
		/// <returns></returns>
		public bool MoveNext()
		{
			_currentIndex += 1;
			if ((long)_currentIndex >= (long)_memory.Size)
				return false;
			else
				return true;
		}

	}

	
}
